################################################## #
###Replications: Climate Framing - Datacleaning for LASSOplus#####
################################################## #
#Paper Title: Systematic mapping of climate and environmental framing experiments and re-analysis with computational methods points to omitted interaction bias 
#Authors: Lukas Fesenfeld*,**,�, Liam Beiser-McGrath***, Yixian Sun****, Michael Wicki*, Thomas Bernauer*
#Current affiliations:*University of Bern, Bern, Switzerland; **ETH Z�rich, Z�rich, Switzerland;***Royal Holloway, University of London, London, UK; ****University of Bath, Bath, UK
#�Corresponding author: Lukas Fesenfeld, lukas.fesenfeld@unibe.ch, University of Bern, Fabrikstrasse 8, 3012 Bern, Switzerland


rm(list = ls())
#This script cleans and prepares the original data for a re-analysis with LASSOplus

#install.packages("sparsereg", dependencies = TRUE)
library(tidyverse)
library(foreign)
library(here)
library(sparsereg)
library(readstata13)
library(fastDummies)
library(dplyr)
library(plyr)
library(dotwhisker)
library(imputeTS)

################################################################################################ #
#ID 17: Renewable energy policy design and framing influence public support in the United States#---------
################################################################################################ #
#Original experiment reports heterogeneous effects by pid3_leaners (PartyID)

#RPS Experiment dataframe
load("./03_Replication_Data_Files/ID17_StokesWarshaw2017/og_data/rps_experiment.RData")
id17_exp <- master

id17_exp <- id17_exp %>%mutate(Randomization2b = revalue(as.factor(id17_exp$Randomization2b), c("0" ="Control", "1"= "Increase costs $2 per month", "2"="Increase costs $10 per month"))) %>% 
  mutate(Randomization3b = revalue(as.factor(id17_exp$Randomization3b), c("0" ="Control", "1"= "No increase in jobs", "2"="Large Increase in jobs")))  %>% 
  mutate(Randomization4b = revalue(as.factor(id17_exp$Randomization4b), c("0" ="Control", "1"= "Would reduce harmful air pollution"))) %>% 
  mutate(Randomization5b = revalue(as.factor(id17_exp$Randomization5b), c("0" ="Control", "1"= "Anti-Climate Change argument", "2"="Pro-Climate Change argument", "3"="Balanced argument"))) %>% 
  mutate(Randomization1b = revalue(as.factor(id17_exp$Randomization1b), c("0" ="Control", "1"= "Most Democrats Support", "2"="Most Republicans Support"))) 


### ID 17: Preparing LassoPlus df

##create Lasso df
id <- 1:nrow(id17_exp)
id17_lasso <- data.frame(id)

##transform all variables into numeric (for some needs to be done manually)
id17_lasso <- id17_lasso %>% mutate(billsup = as.numeric(id17_exp$billsup)) %>% 
  mutate(Costs = revalue(as.factor(id17_exp$Randomization2b), c("0" ="Control", "1"= "Increase costs $2 per month", "2"="Increase costs $10 per month"))) %>% 
  mutate(Jobs = revalue(as.factor(id17_exp$Randomization3b), c("0" ="Control", "1"= "No increase in jobs", "2"="Large Increase in jobs")))  %>% 
  mutate(Air = revalue(as.factor(id17_exp$Randomization4b), c("0" ="Control", "1"= "Would reduce harmful air pollution"))) %>% 
  mutate(Climate = revalue(as.factor(id17_exp$Randomization5b), c("0" ="Control", "1"= "Anti-Climate Change argument", "2"="Pro-Climate Change argument", "3"="Balanced argument"))) %>% 
  mutate(Messenger = revalue(as.factor(id17_exp$Randomization1b), c("0" ="Control", "1"= "Most Democrats Support", "2"="Most Republicans Support"))) %>% 
  mutate(Randomization2b = revalue(as.factor(id17_exp$Randomization2b), c("0" ="Control", "1"= "Increase costs $2 per month", "2"="Increase costs $10 per month"))) %>% 
  mutate(Randomization3b = revalue(as.factor(id17_exp$Randomization3b), c("0" ="Control", "1"= "No increase in jobs", "2"="Large Increase in jobs")))  %>% 
  mutate(Randomization4b = revalue(as.factor(id17_exp$Randomization4b), c("0" ="Control", "1"= "Would reduce harmful air pollution"))) %>% 
  mutate(Randomization5b = revalue(as.factor(id17_exp$Randomization5b), c("0" ="Control", "1"= "Anti-Climate Change argument", "2"="Pro-Climate Change argument", "3"="Balanced argument"))) %>% 
  mutate(Randomization1b = revalue(as.factor(id17_exp$Randomization1b), c("0" ="Control", "1"= "Most Democrats Support", "2"="Most Republicans Support"))) %>%
  mutate(pid3_leaners = as.numeric(as.factor(id17_exp$pid3_leaners))) %>% 
  mutate(income2 = as.numeric(id17_exp$income2)) %>% 
  mutate(age = as.numeric(id17_exp$age)) %>% 
  
  ##transform education variable into numeric
  mutate(edu = ifelse(id17_exp$educ == "Did not graduate from high school", 1, 
                      ifelse(id17_exp$educ == "High school graduate", 2,
                             ifelse(id17_exp$educ == "Some college, but no degree", 3,
                                    ifelse(id17_exp$educ == "2-year college degree", 4,
                                           ifelse(id17_exp$educ == "4-year college degree", 5,
                                                  ifelse(id17_exp$educ == "Postgraduate degree (MA, MBA, MD, JD, PhD, etc.)", 6, NA))))))) %>%
  ##tranform race variable into numeric 
  mutate(race = ifelse(id17_exp$race == "black", 1, 
                       ifelse(id17_exp$race == "hispanic", 2,
                              ifelse(id17_exp$race == "white", 3,
                                     ifelse(id17_exp$race == "other", 4, NA))))) %>%
  ##transform gender into numeric, binary indicating female
  mutate(female = ifelse(id17_exp$gender == "female", 1, 0)) %>%
  
  ##transform pid3_leaners into numeric, binary indicating Republican
  mutate(rep = ifelse(id17_exp$pid3_leaners == "Republican", 1, 0)) %>%
  
  ##transform pid3_leaners into numeric, binary indicating Democract
  mutate(demo = ifelse(id17_exp$pid3_leaners == "Democrat", 1, 0)) %>%
  
  ##transform pid3_leaners into numeric, binary indicating Independent
  mutate(indep = ifelse(id17_exp$pid3_leaners == "Independent", 1, 0))

id17_lasso$Randomization2b <- factor(id17_lasso$Randomization2b, levels = c("Control","Increase costs $2 per month","Increase costs $10 per month"))
levels(id17_lasso$Randomization2b)

##drop all rows with any missing
id17_lasso <- id17_lasso %>% drop_na()



################################################################################### #
# ID 35: Costs, benefits, and the malleability of public support for Fracking ######
################################################################################### #

### Load DF ###
id35 <- read.dta13("./03_Replication_Data_Files/ID35_ChristensonGoldfarb2017/og_data/cgk_replication_data.dta", 
                   missing.type = T)


id35_lasso <- id35 %>% mutate(id35 = 1:nrow(id35))

#exclude unnecessary variables
id35_lasso <- id35_lasso %>%   
  dplyr::select(-c(caseid , weight, starttime, endtime, wells, gallonsofwater))


data.frame(sapply(id35_lasso, class))

## exclude variables which are not in model
id35_lasso <- id35_lasso %>% dplyr::select(-faminc)

##Recode variables

#recode birthyear to age
id35_lasso$age <- 2015-id35_lasso$birthyr
id35_lasso <- id35_lasso %>% dplyr::select(-birthyr)

#recode gender to male
id35_lasso$male <-  ifelse(id35_lasso$gender==1, 1, 0)
id35_lasso <- id35_lasso %>% dplyr::select(-gender)

#recode race to white(=1) or other(=0)
id35_lasso$white <- ifelse(id35_lasso$race==1, 1, 0)
id35_lasso <- id35_lasso %>% dplyr::select(-race)

#recode pid7 (party), in codefile: pid7[1,2,3]= dem5, pid7[5,6,7] = gop5, pid7[4]=ind5
id35_lasso$republican <- id35_lasso$pid7 ##or what means gop?
id35_lasso <- id35_lasso %>% dplyr::select(-pid7)

# recode Climate change serious, BOS102[1,2]=1 BOS102[3,4,5]=0
id35_lasso$CCserious <- ifelse(id35_lasso$BOS102==1, 1,
                               ifelse (id35_lasso$BOS102==2, 1, 0))
id35_lasso <- id35_lasso %>% dplyr::select(-BOS102)


#recode fracking as a binary BOS103[1,2]=1 BOS103[3,4,5]=0
id35_lasso$DVfrackingsupport2 <- ifelse(id35_lasso$BOS103==1, 1,
                                        ifelse (id35_lasso$BOS103==2, 1, 0))

# recode fracking with 3 characters [oppose = 0, dont know = 1, support = 2]
id35_lasso$DVfrackingsupport3 <- ifelse(id35_lasso$BOS103==1, 2,
                                        ifelse (id35_lasso$BOS103==2, 2, 
                                                ifelse(id35_lasso$BOS103==3, 1,
                                                       ifelse (id35_lasso$BOS103==4, 0, 
                                                               ifelse(id35_lasso$BOS103==5, 0, NA)))))

# recode fracking with 5 characters [oppose = 0, dont know = 1, support = 5] 
id35_lasso$DVfrackingsupport <- ifelse(id35_lasso$BOS103==1, 5,
                                       ifelse (id35_lasso$BOS103==2, 4, 
                                               ifelse(id35_lasso$BOS103==3, 3,
                                                      ifelse (id35_lasso$BOS103==4, 2, 
                                                              ifelse(id35_lasso$BOS103==5, 1, NA)))))
id35_lasso <- id35_lasso %>% dplyr::select(-BOS103)


##Treatment indicator not 100% sure from Statafile, but I suppose its like this:
id35_lasso$treat <- ifelse(id35_lasso$BOS103_rand==1, "control",
                           ifelse (id35_lasso$BOS103_rand==2, "benefits", 
                                   ifelse(id35_lasso$BOS103==3, "costs",
                                          ifelse (id35_lasso$BOS103==4, "benefitsAcosts", NA))))
id35_lasso <- id35_lasso %>% dplyr::select(-BOS103_rand)


#ideology is used in recoded original stata script like this: * Ideology * | gen ideology = ideo5| recode ideology (6=3)| gen ideology2 = ideo5| replace ideology2 = . if ideology2 == 6

##religpew in analysis included but not recoded

#exclude variables that are not used
id35_lasso <- id35_lasso %>% dplyr::select(-marstat, employ, pid3, inputstate, votereg, fips_state)




## Check missings 

id35_na <- rep(NA, length(id35_lasso))
names(id35_na) <- names(id35_lasso)

for (i in 1:length(id35_lasso)) {
  id35_na[i] <- sum(is.na(id35_lasso[i]))
}

data.frame(id35_na)

# All observations before dropping NA 
all_id35 <- nrow(id35_lasso)

# Drop all rows with any missing --> religpew=19
id35_lasso <- id35_lasso %>% drop_na()

# Total observations removed =19
all_id35 - nrow(id35_lasso) 


id35_lasso$treat <- as.factor(id35_lasso$treat)

data.frame(sapply(id35_lasso, class))
data.frame(id35_lasso)

########################################################################################################################### #
#ID 41: How issue frames shape beliefs about the importance of climate change policy across ideological and partisan groups############
########################################################################################################################### #

#Original experiment reports heterogeneous effects by PartyID and Ideology

### Load DF ###
id41 <- read.dta13("./03_Replication_Data_Files/ID41_SinghSwanson2017/dataverse_files_Singh_Swanson/Survey_Final.dta", 
                   missing.type = T)

#recode partyID into factor
id41$party_id_f <- as.factor(id41$party_id)

id41$party_id_f <- ifelse(id41$party_id_f ==1, "Democrat",
                          ifelse(id41$party_id_f ==2, "Independent",
                                 ifelse(id41$party_id_f ==3, "Republican",NA)))

table(id41$party_id_f )

# Recoding treatment variable 
id41$enviro_treatment_source[is.na(id41$enviro_treatment_source)] <- 0
id41$enviro_treatment[is.na(id41$enviro_treatment)] <- 0

id41$displace_treatment_source[is.na(id41$displace_treatment_source)] <- 0
id41$displace_treatment[is.na(id41$displace_treatment)] <- 0

id41$defense_treatment_source[is.na(id41$defense_treatment_source)] <- 0
id41$defense_treatment[is.na(id41$defense_treatment)] <- 0

id41$control[is.na(id41$control)] <- 0

id41 <- id41 %>% mutate(treat = ifelse(control == 1, "control",
                                       ifelse(enviro_treatment == 1, "enviro",
                                              ifelse(enviro_treatment_source == 1, "enviro_source",
                                                     ifelse(displace_treatment == 1, "displace",
                                                            ifelse(displace_treatment_source == 1, "displace_source",
                                                                   ifelse(defense_treatment == 1, "defense",
                                                                          ifelse(defense_treatment_source == 1, "defense_source", NA))))))))


id41$treat <- factor(id41$treat,
                     levels = c("control", "enviro", "enviro_source", 
                                "displace", "displace_source",
                                "defense", "defense_source"))

table(id41$treatment)

## Create Lasso DF
id41_lasso <- id41 %>% mutate(id = 1:nrow(id41))


## Select Variables
id41_lasso <- id41_lasso %>% 
  # party variables (summarized in party_id_scale)
  dplyr::select(-c(party_id, repub_id_strength, dem_id_strength, ind_id_strength)) %>% 
  # rank variables (only reversed rank kept)
  dplyr::select(-c(rank_climate, rank_democratization, rank_global_health, rank_econ_policy, rank_nukes, rank_terror)) %>% 
  # metadata, v7 only 0 
  dplyr::select(-c(ipaddress, v7, start, finish, finished))


# Filter Question 
id41_lasso <- id41_lasso %>% filter(filter == 4)


## Adopt variables 

# Adopt binary treatment indicators 
id41_lasso$enviro_treatment_source[is.na(id41_lasso$enviro_treatment_source)] <- 0
id41_lasso$enviro_treatment[is.na(id41_lasso$enviro_treatment)] <- 0

id41_lasso$displace_treatment_source[is.na(id41_lasso$displace_treatment_source)] <- 0
id41_lasso$displace_treatment[is.na(id41_lasso$displace_treatment)] <- 0

id41_lasso$defense_treatment_source[is.na(id41_lasso$defense_treatment_source)] <- 0
id41_lasso$defense_treatment[is.na(id41_lasso$defense_treatment)] <- 0

id41_lasso$control[is.na(id41_lasso$control)] <- 0


## Check Missings 
id41_na <- rep(NA, length(id41_lasso))
names(id41_na) <- names(id41_lasso)

for (i in 1:length(id41_lasso)) {
  id41_na[i] <- sum(is.na(id41_lasso[i]))
}


data.frame(id41_na) # no variable with too many missings 

# All observations before dropping NA
all_id41 <- nrow(id41_lasso)

# Drop all rows with any missing
id41_lasso <- id41_lasso %>% drop_na()

# Total observations removed
all_id41 - nrow(id41_lasso) 


## Adopt Variables

# Single treatment factor
##summarise different variables as one variable. 
id41_lasso <- id41_lasso %>% mutate(treat = ifelse(control == 1, "control",
                                                   ifelse(enviro_treatment == 1, "enviro",
                                                          ifelse(enviro_treatment_source == 1, "enviro_source",
                                                                 ifelse(displace_treatment == 1, "displace",
                                                                        ifelse(displace_treatment_source == 1, "displace_source",
                                                                               ifelse(defense_treatment == 1, "defense",
                                                                                      ifelse(defense_treatment_source == 1, "defense_source", NA))))))))
id41_lasso$treat <- factor(id41_lasso$treat,
                           levels = c("control", "enviro", "enviro_source", 
                                      "displace", "displace_source",
                                      "defense", "defense_source"))

# drop numeric treatment var and binary treatment indicators 
id41_lasso <- id41_lasso %>% 
  dplyr::select(-c(treatment, control, enviro_treatment_source, enviro_treatment,
                   displace_treatment_source, displace_treatment, 
                   defense_treatment_source, defense_treatment))


# # Recode Gender 
# id41_lasso$female <- id41_lasso$gender

# Set variables to numeric
data.frame(sapply(id41_lasso, class))

# Recode state to binaries 
id41_lasso$state <- gsub(" ", "", id41_lasso$state)
id41_lasso <- fastDummies::dummy_cols(id41_lasso, select_columns = "state")
id41_lasso <- id41_lasso %>% dplyr::select(-state)

# Recode Party_ID to binaries 
id41_lasso$party_id_f <- gsub(" ", "", id41_lasso$party_id_f)
id41_lasso <- fastDummies::dummy_cols(id41_lasso, select_columns = "party_id_f") ##sagen welche column das sein soll, R macht aus jeder Variabel von den characters in dieser column eine neue binary variabel 
id41_lasso <- id41_lasso %>% dplyr::select(-party_id_f)

# final check if nummeric or integer
data.frame(sapply(id41_lasso, class))




####################################################################################################################################### #
# ID 57: Does the label really matter? Evidence that the US public continues to doubt "global warming" more than "climate change" ######
####################################################################################################################################### #

id57 <- read.csv("./03_Replication_Data_Files/ID57_SchuldtEnns2017/og_data/31115251.csv", sep=",", na.strings = c("NA", "")) 


#create new dataframe with relevant variables
id57_lasso <- id57 %>%   
  dplyr::select(c(xparty7, q23, dov_q23, ppage, ppeduc, ppethm ,xppracem, ppgender, ppincimp))

id57_lasso <- id57_lasso %>% mutate(id1 = 1:nrow(id57_lasso))

##recode control variables: political ideology(conservative), education, ethnicity, male, income

id57_lasso$republican <- ifelse(id57_lasso$xparty7=="Strong Democrat", 1,
                                ifelse (id57_lasso$xparty7=="Not Strong Democrat", 2,
                                        ifelse(id57_lasso$xparty7=="Leans Democrat", 3,
                                               ifelse (id57_lasso$xparty7=="Undecided/Independent/Other", 4, 
                                                       ifelse(id57_lasso$xparty7=="Leans Republican", 5,
                                                              ifelse (id57_lasso$xparty7=="Not Strong Republican", 6,
                                                                      ifelse(id57_lasso$xparty7=="Strong Republican", 7, NA)))))))

# recode party to binary variables
id57_lasso$republicanbin <- ifelse(id57_lasso$xparty7=="Leans Republican", 1,
                                   ifelse (id57_lasso$xparty7=="Not Strong Republican", 1,
                                           ifelse(id57_lasso$xparty7=="Strong Republican", 1, 0)))
id57_lasso$democratnbin <- ifelse(id57_lasso$xparty7=="Strong Democrat", 1,
                                  ifelse (id57_lasso$xparty7=="Not Strong Democrat", 1,
                                          ifelse(id57_lasso$xparty7=="Leans Democrat", 1, 0)))

id57_lasso$politmiddlebin <- ifelse (id57_lasso$xparty7=="Undecided/Independent/Other", 1, 0) 



id57_lasso$ppeduc <- factor(id57_lasso$ppeduc, levels = c("No formal education", "1st, 2nd, 3rd, or 4th grade", "5th or 6th grade", "7th or 8th grade","9th grade","10th grade","11th grade"  
                                                          ,"12th grade NO DIPLOMA","HIGH SCHOOL GRADUATE - high school DIPLOMA or the equivalent (GED)",  "Some college, no degree", "Associate degree","Bachelors degree" ,"Masters degree" , "Professional or Doctorate degree"))
id57_lasso$educ <-   as.numeric(factor(id57_lasso$ppeduc))

#0=white, 1=non-white
id57_lasso$nonwhite <- ifelse(id57_lasso$xppracem=="White",  0,
                              ifelse(id57_lasso$xppracem=="Native Hawaiian/Pacific Islander", 1, 
                                     ifelse(id57_lasso$xppracem=="Black or African American", 1, 
                                            ifelse(id57_lasso$xppracem=="American Indian or Alaska Native", 1, 
                                                   ifelse(id57_lasso$xppracem=="2+ races", 1, 
                                                          ifelse(id57_lasso$xppracem=="Asian", 1,NA))))))

id57_lasso$female <- ifelse(id57_lasso$ppgender=="Male", 0, 
                            ifelse(id57_lasso$ppgender=="Female", 1, NA))

#rename ppage variable and make it numeric
id57_lasso$age <- as.numeric(as.factor(id57_lasso$ppage))


id57_lasso$ppincimp <- factor(id57_lasso$ppincimp, levels = c("Less than $5,000", "$5,000 to $7,499", "$7,500 to $9,999", "$10,000 to $12,499","$12,500 to $14,999", "$15,000 to $19,999", 
                                                              "$20,000 to $24,999", "$25,000 to $29,999", "$30,000 to $34,999","$35,000 to $39,999", "$40,000 to $49,999", "$50,000 to $59,999",
                                                              "$60,000 to $74,999", "$75,000 to $84,999", "$85,000 to $99,999", "$100,000 to $124,999", "$125,000 to $149,999", "$150,000 to $174,999",
                                                              "$175,000 to $199,999", "$200,000 to $249,999", "$250,000 or more"))
id57_lasso$educ <-   as.numeric(factor(id57_lasso$ppincimp))

## recode treatment and outcome(binary(0,1) and 0,1,2, coded) variable

id57_lasso$treat <- as.factor(ifelse(id57_lasso$dov_q23==1,  "climatechange",
                                     ifelse(id57_lasso$dov_q23==2, "globalwarming", NA)))

id57_lasso$ccgwhappeningbin <- ifelse(id57_lasso$q23=="Yes, definitely", 1,
                                      ifelse(id57_lasso$q23=="Yes, somewhat", 1, 
                                             ifelse(id57_lasso$q23=="No", 0, NA)))

id57_lasso$ccgwhappening <- ifelse(id57_lasso$q23=="Yes, definitely", 2,
                                   ifelse(id57_lasso$q23=="Yes, somewhat", 1, 
                                          ifelse(id57_lasso$q23=="No", 0, NA)))


#exclude original variables which have been transformed
id57_lasso <- id57_lasso %>%   
  dplyr::select(-c(xparty7, q23, dov_q23, ppage, ppeduc, ppethm ,xppracem, ppgender, ppincimp))

summary(id57_lasso)

## Check missings 
id57_na <- rep(NA, length(id57_lasso))
names(id57_na) <- names(id57_lasso)

for (i in 1:length(id57_lasso)) {
  id57_na[i] <- sum(is.na(id57_lasso[i]))
}


# All observations before dropping NA
all_id57 <- nrow(id57_lasso)

# Drop all rows with any missing
id57_lasso <- id57_lasso %>% drop_na()

# Total observations removed --> 10
all_id57 - nrow(id57_lasso) 

# see all variable structures
data.frame(sapply(id57_lasso, class))
data.frame(id57_lasso)

########################################################################################## #
# ID 71: Doing What Others Do: Norms, Science, and Collective Action on Global Warming ######
########################################################################################## #

#### ID 71: 2010 analysis =====
id71_10 <- read.csv("./03_Replication_Data_Files/id71_BolsenLeeper2014/og_data/gsu2010-data.csv", sep=",", na.strings = c("NA", "")) 
id71_10_lasso <- id71_10 %>% mutate(id1 = 1:nrow(id71_10)) 

id71_10_lasso$treat <- as.factor(ifelse(id71_10_lasso$condition==1,  "control",
                                        ifelse(id71_10_lasso$condition==2, "prodescriptive", 
                                               ifelse(id71_10_lasso$condition==3, "prodescriptive+injunctive", 
                                                      ifelse(id71_10_lasso$condition==4, "condescriptive", 
                                                             ifelse(id71_10_lasso$condition==5, "condescriptive+injunctive", NA))))))

id71_10_lasso <- id71_10_lasso %>%   
  dplyr::select(-c(condition))

# All observations before dropping NA
all_id71_10 <- nrow(id71_10_lasso)

# Drop all rows with any missing
id71_10_lasso <- id71_10_lasso %>% drop_na()

# Total observations removed --> 0 NA's
all_id71_10 - nrow(id71_10_lasso) 

# see all variable structures
data.frame(sapply(id71_10_lasso, class))
data.frame(id71_10_lasso)

#### ### ### ### ### ### ###
#### ID 71: 2011 analysis =====
id71_11 <- read.csv("./03_Replication_Data_Files/id71_BolsenLeeper2014/og_data/gsu2011-data.csv", sep=",", na.strings = c("NA", "")) 
id71_11_lasso <- id71_11 %>% mutate(id1 = 1:nrow(id71_11)) 

id71_11_lasso$treat <- as.factor(ifelse(id71_11_lasso$Condition==1, "control",
                                        ifelse(id71_11_lasso$Condition==2, "normnosci", 
                                               ifelse(id71_11_lasso$Condition==3, "nonormsci", 
                                                      ifelse(id71_11_lasso$Condition==4, "normsci", 
                                                             ifelse(id71_11_lasso$Condition==5, "nonormpolsci",
                                                                    ifelse(id71_11_lasso$Condition==6, "normpolsci", NA)))))))


id71_11_lasso$female = as.numeric(id71_11_lasso$sex)
id71_11_lasso$liberal = as.numeric(id71_11_lasso$ideology) #the higher the more liberal
id71_11_lasso$democrat = as.numeric(id71_11_lasso$partyid) #the higher the more democratic

id71_11_lasso <- id71_11_lasso %>%   
  dplyr::select(-c(Email, sex, ideology, partyid, Condition)) #email contains 1 missing, to exclude it 


## Check missings 
id71_11_na <- rep(NA, length(id71_11_lasso))
names(id71_11_na) <- names(id71_11_lasso)

for (i in 1:length(id71_11_lasso)) {
  id71_11_na[i] <- sum(is.na(id71_11_lasso[i]))
}

# All observations before dropping NA
all_id71_11 <- nrow(id71_11_lasso)

# Drop all rows with any missing
id71_11_lasso <- id71_11_lasso %>% drop_na()

# Total observations removed --> 15 (3gender, 10age, 2AdaptPrevent)
all_id71_11 - nrow(id71_11_lasso) 

# see all variable structures
data.frame(sapply(id71_11_lasso, class))
data.frame(id71_11_lasso)


########################################################################################################################################### #
# ID 73: The impact of elite frames and motivated reasoning on beliefs in a global warming conspiracy: The promise and limits of trust ######
########################################################################################################################################### #


### Load DF_12 ###
id73_12 <- read.dta13("./03_Replication_Data_Files/id73_Saunders2017/og_data/GWCC051017RandP12.dta", 
                      missing.type = T)


id73_12_lasso <- id73_12 %>% mutate(id73_12 = 1:nrow(id73_12))
id73_12_lasso$treat <- gsub(" ", "", id73_12_lasso$QrandB7aabb)

id73_12_lasso <- id73_12_lasso %>% 
  dplyr::select(c(white, age, QG9, income, educ_alt, religiosity, QC20, needeval_comb_alt, needeval2_new, efficacy, auth, polknow_alt, QB7_combrev, trust_comb, pid7, treat, openness, conscientiousness, extraversion, agreeableness, emostab)) 

id73_12_lasso <- id73_12_lasso %>% mutate(female = ifelse(id73_12_lasso$QG9 =="Female", 1,
                                                          ifelse(id73_12_lasso$QG9=="Male", 0, NA)))


id73_12_lasso <- id73_12_lasso %>% mutate(female = ifelse(id73_12_lasso$QG9 =="Female", 1,
                                                          ifelse(id73_12_lasso$QG9=="Male", 0, NA)))


id73_12_lasso$ccgwishoax <- as.numeric(factor(id73_12_lasso$QB7_combrev, levels = c("GW/CC is definitely not a hoax", "GW/CC is probably a hoax_(3)", "GW/CC is probably a hoax_(2)", "GW/CC is definitely a hoax")))

id73_12_lasso$republican <- as.numeric(factor(id73_12_lasso$pid7))

id73_12_lasso$fedpowertoomuch <- as.numeric(factor(id73_12_lasso$QC20, levels = c("Too little power", "About the right amount of power", "Too much power")))

id73_12_lasso <- id73_12_lasso %>% 
  dplyr::select(-c(QG9, QB7_combrev, pid7, QC20)) 


id73_12_lasso$treat <- as.factor(id73_12_lasso$treat)

## Check missings 
id73_12_na <- rep(NA, length(id73_12_lasso))
names(id73_12_na) <- names(id73_12_lasso)

for (i in 1:length(id73_12_lasso)) {
  id73_12_na[i] <- sum(is.na(id73_12_lasso[i]))
}

# All observations before dropping NA
all_id73_12 <- nrow(id73_12_lasso)

# Drop all rows with any missing
id73_12_lasso <- id73_12_lasso %>% drop_na()

# Total observations removed
all_id73_12 - nrow(id73_12_lasso) 

# see all variable structures
data.frame(sapply(id73_12_lasso, class))
data.frame(id73_12_lasso)


### Load DF_13 ###
id73_13 <- read.dta13("./03_Replication_Data_Files/id73_Saunders2017/og_data/GWCC051017RandP13.dta", 
                      missing.type = T)


id73_13_lasso <- id73_13 %>% mutate(id73_13 = 1:nrow(id73_13))
id73_13_lasso$treat <- gsub(" ", "", id73_13_lasso$QrandB7aabb)

id73_13_lasso <- id73_13_lasso %>% 
  dplyr::select(c(white, age, QG9, income, educ_alt, religiosity, QC20, needeval_comb_alt, needeval2_new, efficacy, auth, polknow_alt, QB7_combrev, trust_comb, pid7, treat, openness, conscientiousness, extraversion, agreeableness, emostab)) 

id73_13_lasso <- id73_13_lasso %>% mutate(female = ifelse(id73_13_lasso$QG9 =="Female", 1,
                                                          ifelse(id73_13_lasso$QG9=="Male", 0, NA)))

id73_13_lasso$ccgwishoax <- as.numeric(factor(id73_13_lasso$QB7_combrev, levels = c("GW/CC is definitely not a hoax", "GW/CC is probably a hoax_(3)", "GW/CC is probably a hoax_(2)", "GW/CC is definitely a hoax")))

id73_13_lasso$republican <- as.numeric(factor(id73_13_lasso$pid7))

id73_13_lasso$fedpowertoomuch <- as.numeric(factor(id73_13_lasso$QC20, levels = c("Too little power", "About the right amount of power", "Too much power")))

id73_13_lasso <- id73_13_lasso %>% 
  dplyr::select(-c(QG9, QB7_combrev, pid7, QC20)) 

id73_13_lasso$treat <- as.factor(id73_13_lasso$treat)

summary(id73_13_lasso) ## a lot of NA's, however, not due to transformation, so I assume "real" NA's


## Check missings 
id73_13_na <- rep(NA, length(id73_13_lasso))
names(id73_13_na) <- names(id73_13_lasso)

for (i in 1:length(id73_13_lasso)) {
  id73_13_na[i] <- sum(is.na(id73_13_lasso[i]))
}


# All observations before dropping NA
all_id73_13 <- nrow(id73_13_lasso)

# Drop all rows with any missing
id73_13_lasso <- id73_13_lasso %>% drop_na()

# Total observations removed 
all_id73_13 - nrow(id73_13_lasso) 

# see all variable structures
data.frame(sapply(id73_13_lasso, class))
data.frame(id73_13_lasso)



####################################################################################################### #
# ID 74: A Dirty Word or a Dirty World? Attribute Framing, Political Affiliation, and Query Theory ######
####################################################################################################### #

##Load data study 1
id74_1 <- read.csv("./03_Replication_Data_Files/id74_HardistyJohnson2010/og_data/carbon1-cleaned-and-cut.csv", sep=";", na.strings = c("NA", "")) 


id74_1_lasso <- id74_1 %>% mutate(id1 = 1:nrow(id74_1)) 

id74_1_lasso$treat <- as.factor(ifelse(id74_1_lasso$offset1=="1",  "offset",
                                       ifelse(id74_1_lasso$offset1=="-1",  "tax", NA)))


id74_1_lasso <- id74_1_lasso %>%
  dplyr::select(-c(serial, None0Demo1Repub2, CHILDREN, Single0L1M2D3W4, occupation, smoke, experiment_number, begin_time, end_time, duration_minutes, TaxLin, OffLin, offset1, NEPrTOT, indif6, lambda1, indif20, lambda2))

##We assume: co1/2/3/4 =  = which ticket/brand/option/computer one prefer. ps1/2/3/4 = how strongly, mm1/2/3/4 = if it should be mandatory

## Check missings 
id74_1_na <- rep(NA, length(id74_1_lasso))
names(id74_1_na) <- names(id74_1_lasso)

for (i in 1:length(id74_1_lasso)) {
  id74_1_na[i] <- sum(is.na(id74_1_lasso[i]))
}


# All observations before dropping NA
all_id74_1 <- nrow(id74_1_lasso)

# Drop all rows with any missing
id74_1_lasso <- id74_1_lasso %>% drop_na()

# Total observations removed --> 0 (this is already the cleaned data)
all_id74_1 - nrow(id74_1_lasso) 

# see all variable structures
data.frame(sapply(id74_1_lasso, class))
data.frame(id74_1_lasso)



##Load data study 2
id74_2 <- read.csv("./03_Replication_Data_Files/id74_HardistyJohnson2010/og_data/carbon2-cleaned-and-cut.csv", sep=",", na.strings = c("NA", "")) 


id74_2_lasso <- id74_2 %>% mutate(id1 = 1:nrow(id74_2)) 

id74_2_lasso <- id74_2_lasso %>%
  dplyr::select(c(tax0off1, choice, pref, mand, Ethnicity, HighestEducation, Politicalaffiliation, Conservatism, Agood, Abad, Bgood, Bbad, Neither, EnvGnum, EnvBnum, age))
## I don't know what MRi and MRd mean, but they produce 22 NA's so they are not included 

id74_2_lasso$treat <- as.factor(ifelse(id74_2_lasso$tax0off1=="1",  "offset",
                                       ifelse(id74_2_lasso$tax0off1=="0",  "tax", NA)))


id74_2_lasso$white <- as.numeric(ifelse(id74_2_lasso$Ethnicity=="White",  "1",
                                        ifelse(id74_2_lasso$Ethnicity=="Asian",  "0", 
                                               ifelse(id74_2_lasso$Ethnicity=="Black or African American",  "0",
                                                      ifelse(id74_2_lasso$Ethnicity=="Hispanic or Latino ",  "0",
                                                             ifelse(id74_2_lasso$Ethnicity=="Other",  "0",
                                                                    ifelse(id74_2_lasso$Ethnicity=="Native Hawaiian or Other Pacific Islander ", "0",
                                                                           ifelse(id74_2_lasso$Ethnicity=="American Indian or Alaska Native", "0", NA))))))))



id74_2_lasso <- id74_2_lasso %>% dplyr::select(-Ethnicity)


id74_2_lasso$Politicalaffiliation <- gsub(" ", "", id74_2_lasso$Politicalaffiliation)
id74_2_lasso <- fastDummies::dummy_cols(id74_2_lasso, select_columns = "Politicalaffiliation")
id74_2_lasso <- id74_2_lasso %>% dplyr::select(-Politicalaffiliation)

id74_2_lasso$age <- as.numeric(as.factor(id74_2_lasso$age))
id74_2_lasso$Agood <- as.numeric(as.factor(id74_2_lasso$Agood))
id74_2_lasso$Abad <- as.numeric(as.factor(id74_2_lasso$Abad))
id74_2_lasso$Bgood <- as.numeric(as.factor(id74_2_lasso$Bgood))
id74_2_lasso$Bbad <- as.numeric(as.factor(id74_2_lasso$Bbad))
id74_2_lasso$Neither <- as.numeric(as.factor(id74_2_lasso$Neither))
id74_2_lasso$EnvGnum <- as.numeric(as.factor(id74_2_lasso$EnvGnum))
id74_2_lasso$EnvBnum <- as.numeric(as.factor(id74_2_lasso$EnvBnum))


## Check missings 
id74_2_na <- rep(NA, length(id74_2_lasso))
names(id74_2_na) <- names(id74_2_lasso)

for (i in 1:length(id74_2_lasso)) {
  id74_2_na[i] <- sum(is.na(id74_2_lasso[i]))
}


# All observations before dropping NA
all_id74_2 <- nrow(id74_2_lasso)

# Drop all rows with any missing
id74_2_lasso <- id74_2_lasso %>% drop_na()

summary(id74_2_lasso$EnvGnum)

# Total observations removed --> 30
all_id74_2 - nrow(id74_2_lasso) 

# see all variable structures
data.frame(sapply(id74_2_lasso, class))

data.frame(id74_2_lasso)


##Load data study 3
#id74_3 <- read.csv("id74_HardistyJohnson2010/og_data/carbon3-data.csv", sep=",", na.strings = c("NA", "")) 
#id74_3a <- read.csv("id74_HardistyJohnson2010/og_data/carbon3-aspects.csv", sep=",", na.strings = c("NA", "")) 
# The third study not relevant for replication.


######################################################################################################################## #
#ID 83: Do partisanship and politicization undermine the impact of a scientific consensus message about  climate change?#####
######################################################################################################################## #

### Load DF ###
id83 <- read.dta13("./03_Replication_Data_Files/ID83_BolsenDruckmann2018/og_data/Climate change data - Replication File FINAL.dta")


id83 <- id83 %>% mutate(treatment = id83$condition - 1)  %>% mutate(treatment_f = plyr::revalue(as.factor(id83$condition), c("1" ="Control", "2"= "Consensus", "3"="Politicization", "4"="Warning", "5" ="Correction")))

## Create Lasso DF
id83_lasso <- id83 %>% mutate(id = 1:nrow(id83))

##select all variables except treatment (already numeric)
id83_lasso <- id83_lasso %>% 
  #dependent variables
  dplyr::select(scienceagree, policy, ccexphuman,
                #controls of main models (partisan and knowledge used as conditions)
                inform, politi, warning, correction, pidrep, dem, rep, knowdummy,
                #controls of appendix models 
                agerange, female, income, minority, education, sciovercome, econenv) %>% 
  
  #in original df, variable "condition" indicates wheter R received control (=1) or a treatment (=2-5)
  #in Lasso df, treatment variable control group set as baseline (=0)
  mutate(treatment = id83$condition - 1)  %>% mutate(treatment_f = plyr::revalue(as.factor(id83$condition), c("1" ="Control", "2"= "Consensus", "3"="Politicization", "4"="Warning", "5" ="Correction")))


## Check missings 
id83_na <- rep(NA, length(id83_lasso))
names(id83_na) <- names(id83_lasso)

for (i in 1:length(id83_lasso)) {
  id83_na[i] <- sum(is.na(id83_lasso[i]))
}

data.frame(id83_na)

# All observations before dropping NA
all_id83 <- nrow(id83_lasso)

# Drop all rows with NA's due to panel drop-out and non-response
id83_lasso <- id83_lasso %>% drop_na()

# Total observations removed
all_id83 - nrow(id83_lasso)  

#subset to LASSOdf to include only partisan respondents (see original paper)

all_id83_lasso_part <- subset(id83_lasso,pidrep !=4 )

# Drop all rows with NA's due to panel drop-out and non-response
id83_lasso_part <- all_id83_lasso_part %>% drop_na()

## Adopt Variables 

# set all variables to numeric
data.frame(sapply(id83_lasso, class))

########################################################################################################### #
# ID 86: "Global warming" or "climate change"?: Whether the planet is warming depends on question wording ######
########################################################################################################### #

id86 <- read.csv("./03_Replication_Data_Files/ID86_SchuldtKonrath2011/og_data/ms72_weighted.csv", sep=";", na.strings = c("NA", ""),
                 stringsAsFactors = TRUE) 

id86_lasso <- id86 %>% mutate(id1 = 1:nrow(id86)) 

id86_lasso$satifiedlife <- as.numeric(id86_lasso$a1)
id86_lasso$income <- as.numeric(id86_lasso$a2)
id86_lasso$satisfiedfamilylife <- as.numeric(id86_lasso$a3)
id86_lasso$satifiedfriends <- as.numeric(id86_lasso$a4)
id86_lasso$satifieddaily <- as.numeric(id86_lasso$a5)

id86_lasso$gwtreat <- as.numeric(id86_lasso$gw1_a)
id86_lasso$cctreat <- as.numeric(id86_lasso$gw1_b)

id86_lasso$gwtreat <- na_replace(as.numeric(id86_lasso$gwtreat), 0)
id86_lasso$cctreat <- na_replace(as.numeric(id86_lasso$cctreat), 0)

id86_lasso <- id86_lasso %>% mutate(gwcchappen = ifelse(id86_lasso$gwtreat==1, 1,
                                                        ifelse(id86_lasso$gwtreat==2, 2,
                                                                ifelse(id86_lasso$gwtreat==3, 3,
                                                                       ifelse (id86_lasso$gwtreat==4,4, 
                                                                               ifelse(id86_lasso$gwtreat==5, 5,
                                                                                      ifelse (id86_lasso$gwtreat==6, 6,
                                                                                              ifelse(id86_lasso$gwtreat==7, 7, 
                                                                                                     ifelse(id86_lasso$cctreat==1, 1,
                                                                                                            ifelse (id86_lasso$cctreat==2, 2,
                                                                                                                    ifelse(id86_lasso$cctreat==3, 3,
                                                                                                                           ifelse (id86_lasso$cctreat==4, 4, 
                                                                                                                                   ifelse(id86_lasso$cctreat==5, 5,
                                                                                                                                          ifelse (id86_lasso$cctreat==6, 6,
                                                                                                                                                  ifelse(id86_lasso$cctreat==7, 7, NA)))))))))))))))
id86_lasso <- id86_lasso %>% mutate(treat = ifelse(id86_lasso$gwtreat==1, "globalwarming",
                                                   ifelse (id86_lasso$gwtreat==2, "globalwarming",
                                                           ifelse(id86_lasso$gwtreat==3, "globalwarming",
                                                                  ifelse (id86_lasso$gwtreat==4,"globalwarming",
                                                                          ifelse(id86_lasso$gwtreat==5, "globalwarming",
                                                                                 ifelse (id86_lasso$gwtreat==6, "globalwarming",
                                                                                         ifelse(id86_lasso$gwtreat==7, "globalwarming",
                                                                                                ifelse(id86_lasso$cctreat==1, "climatechange",
                                                                                                       ifelse (id86_lasso$cctreat==2, "climatechange",
                                                                                                               ifelse(id86_lasso$cctreat==3, "climatechange",
                                                                                                                      ifelse (id86_lasso$cctreat==4, "climatechange",
                                                                                                                              ifelse(id86_lasso$cctreat==5, "climatechange",
                                                                                                                                     ifelse (id86_lasso$cctreat==6, "climatechange",
                                                                                                                                             ifelse(id86_lasso$cctreat==7, "climatechange", NA)))))))))))))))


id86_lasso$treat <- as.factor(id86_lasso$treat)
id86_lasso$p <- as.numeric(substr(id86_lasso$gw2, 1, 1))
id86_lasso$p <- as.factor(ifelse(id86_lasso$p=="1",  "Democrat", 
                                 ifelse(id86_lasso$p =="2",  "Republican", 
                                        ifelse(id86_lasso$p=="3",  "Independent", 
                                               ifelse(id86_lasso$p=="4",  "noDemoRepInd", NA)))))


id86_lasso <- fastDummies::dummy_cols(id86_lasso, select_columns = "p")

id86_lasso$publicenvhelp <- as.numeric(id86_lasso$gw3)
id86_lasso$educ <- as.numeric(id86_lasso$gw4)
id86_lasso$notimeoutdoortoday <- as.numeric(id86_lasso$gw5)-1
id86_lasso$notimeoutdooryesterday <- as.numeric(id86_lasso$gw6)-1
id86_lasso$notlookatweatherreporttoday <- as.numeric(id86_lasso$gw7)-1
id86_lasso$notlookatweatherreportyesterday <- as.numeric(id86_lasso$gw8)-1
id86_lasso$warmerthanyesterday <- as.numeric(id86_lasso$gw9)

id86_lasso$birthyear <- as.numeric(id86_lasso$birthyear)
id86_lasso$age <- 2011-id86_lasso$birthyear
id86_lasso$age <- as.numeric(id86_lasso$age)

id86_lasso$borninus <- as.numeric(ifelse(id86_lasso$borninus=="1 Yes",  1, 
                                         ifelse(id86_lasso$borninus =="2 No",  0, NA))) 

id86_lasso$white <- as.numeric(ifelse(id86_lasso$ethnicity=="1 White/Caucasian",  1, 
                                      ifelse(id86_lasso$ethnicity =="2 Black/African American",  0, 
                                             ifelse(id86_lasso$ethnicity=="3 American Indian or Alaskan Native",  0, 
                                                    ifelse(id86_lasso$ethnicity=="4 Asian or Pacific Islander",  0, 
                                                           ifelse(id86_lasso$ethnicity=="5 Other",  0, NA))))))


id86_lasso$familyincome <- as.numeric(id86_lasso$familyincome)
id86_lasso$female <- as.numeric(id86_lasso$gender)-1

##exclude unused & transformed variables
id86_lasso <- id86_lasso %>%
  dplyr::select(-c(tsend, tsstart, tshhbox, cs_001, oldprim_key, prim_key, hispaniclatino, mexicans1, mexicans2, mexicans3, mexicans4, highesteducation, familyincome_part2, typework, webtv, language, a6, doyouwork, householdmembers, recruitment_type)) %>%
  dplyr::select(-c(a1, a2, a3, a4, a5, gw1_a, gw1_b, gwtreat, cctreat, gw2, p, gw3, gw4, gw5, gw6, gw7, gw8, gw9, birthyear, ethnicity, gender)) %>%
  dplyr::select(-c(colnames(id86_lasso)[grep("rr", colnames(id86_lasso))]))


data.frame(sapply(id86_lasso, class))

## Check missings 
id86_na <- rep(NA, length(id86_lasso))
names(id86_na) <- names(id86_lasso)

for (i in 1:length(id86_lasso)) {
  id86_na[i] <- sum(is.na(id86_lasso[i]))
}


# All observations before dropping NA
all_id86 <- nrow(id86_lasso)

# Drop all rows with any missing
id86_lasso <- id86_lasso %>% drop_na()

# Total observations removed --> 18
all_id86 - nrow(id86_lasso) 

# see all variable structures
str(id86_lasso)



####################################################################################################################### #
# ID 115: Economic losses or environmental gains? Framing effects on public support for environmental management ######
####################################################################################################################### #

id115 <- read.csv("./03_Replication_Data_Files/id115_DeGoliaHiroyasu2019/og_data/journal.pone.0220320.s006.csv", sep=",", na.strings = c("NA", "")) ##na.strings markiert die NA's auch als NA's

id115_lasso <- id115 %>% mutate(id115 = 1:nrow(id115))

#data.frame(sapply(id115, class))

# recode treatment variable and exclude binaries
id115_lasso <- id115_lasso %>% mutate(treat = ifelse(ecogain == 1, "ecogain",
                                                     ifelse(ecoloss == 1, "ecoloss",
                                                            ifelse(econgain == 1, "econgain",
                                                                   ifelse(econloss == 1, "econloss",
                                                                          ifelse(control == 1, "control", NA))))))

id115_lasso$treat <- factor(id115_lasso$treat,
                            levels = c("control", "ecogain", "ecoloss", 
                                       "econgain", "econloss"))

#exclude transformed and weird unnecessary variables, as well as those with a lot of Na's and presumably not used.

id115_lasso <- id115_lasso %>%   
  dplyr::select(-c(ecogain, ecoloss, econgain, econloss, control, ecogain_time, ecoloss_time, econgain_time, econloss_time, control_time)) %>%   
  dplyr::select(-c(Q_TotalDuration, psid, votechoice_text, comments, griz_comments, recother_text, good_complete)) %>%   
  dplyr::select(-c(male_altruism1, male_altruism2, male_bio1, male_bio2, male_bio3, male_ego1, male_ego2, male_ego3, male_altruism3, male_control)) %>%   
  dplyr::select(-c(female_altruism1, female_altruism2, female_bio1, female_bio2, female_bio3, female_ego1, female_ego2, female_ego3, female_altruism3, female_control)) %>%   
  dplyr::select(-c(eco_test, econ_test, why_eco1, supportwhy_eco2, supportwhy_econ1, supportwhy_econ2, supportwhy_econ3, supportwhy_other, opposewhy_eco1, opposewhy_cost, opposewhy_other, opposewhy_eco2)) %>%   
  dplyr::select(-c(contactpol,demonstrate, socialmedia, petition, other, volunteer, donate, boycott, buycott, buyorganic, conserveenergy, enviroproducts, recycle, ac_personal, ac_country, ac_species, ar_personal, ar_industry, ar_government, griz_treat1, griz_treat2, griz_support)) %>%   
  dplyr::select(-c(Q72_33, rec_fishing,rec_hunting,rec_hiking, rec_birding, rec_camping, rec_backpacking, rec_ocean, rec_other, rec_climbing, rec_skiing)) %>%   
  dplyr::select(-c(griz_control, species_bbear)) %>%   
  dplyr::select(-c(ideology, occupation))



##_text is, I assume, when there is an option "other" and people can write sth by themselves --> exclude all?
id115_lasso <- id115_lasso %>% 
  # exclude follow-up (seperate df)  
  dplyr::select(-c(colnames(id115_lasso)[grep("_text", colnames(id115_lasso))])) 


#recode gender # assume that 1=male, 2=female (because of variables male_ego/_bio/_altruism ect.), so now 0=male, 1=female
id115_lasso$female <- as.numeric(as.factor(id115_lasso$gender))-1
id115_lasso <- id115_lasso %>% dplyr::select(-gender)


#### age has 157 NA's

#recode and summaries support or oppose project and its strength --> this is the primary outcome measure
id115_lasso <- id115_lasso %>% mutate(supportop = ifelse(oppose_strength == 3, "1",
                                                         ifelse(oppose_strength == 2, "2",
                                                                ifelse(oppose_strength == 1, "3", NA))))

id115_lasso <- id115_lasso %>% mutate(supportsup = ifelse(support_strength == 1, "4",
                                                          ifelse(support_strength == 2, "5", 
                                                                 ifelse(support_strength == 3, "6", NA))))

id115_lasso$support <- ifelse(!is.na(id115_lasso$supportop),id115_lasso$supportop,id115_lasso$supportsup)
id115_lasso <- id115_lasso %>%   
  dplyr::select(-c(supportop, supportsup, oppose_strength, support_strength, project_support, support_letter, oppose_letter)) 


#id115_lasso$pigsproblem #are pigs a problem because of econ. or ecol. consequences or I don't know (guess=3)

#exclude party variables (as there are others)
id115_lasso <- id115_lasso %>%   
  dplyr::select(-c(ind_lean, dem_strength, rep_strength, polinterest)) 

id115_lasso$support <- as.numeric((id115_lasso$support))


## Check missings 
id115_na <- rep(NA, length(id115_lasso))
names(id115_na) <- names(id115_lasso)

for (i in 1:length(id115_lasso)) {
  id115_na[i] <- sum(is.na(id115_lasso[i]))
}


data.frame(id115_na)

# All observations before dropping NA
all_id115 <- nrow(id115_lasso)

# Drop all rows with any missing
id115_lasso <- id115_lasso %>% drop_na()

# Total observations removed --> 209!!
all_id115 - nrow(id115_lasso) 


data.frame(sapply(id115_lasso, class))

data.frame(id115_lasso)




